#### EXERCICIO 8 - OLIDAN POCIUS
#### GALILEU
init.h = c(600, 700, 800, 950, 1100, 1300, 1500)
h.d = c(253, 337, 395, 451, 495, 534, 573)
plot(h.d~init.h)
## modelo linear
modelo.0 <-lm(h.d~1)
summary(modelo.0)
abline(modelo.0)
modelo.1 <-lm(h.d~init.h)
abline(modelo.1,, col="green")
modelo.2 <- lm(h.d~init.h+I(init.h^2))
cf.m2 <- coef(modelo.2)
curve(cf.m2[1]+cf.m2[2]*x+cf.m2[3]*x^2, add=T, lty=2, col="red")
modelo.3 <-lm(h.d~init.h+I(init.h^2)+I(init.h^3))
cf.m3 <- coef(modelo.3)
curve(cf.m3[1]+cf.m3[2]*x+cf.m3[3]*x^2+cf.m3[4]*x^3,add=T, lty=2, col="blue")
anova(modelo.0,modelo.1,modelo.2,modelo.3)
#Analysis of Variance Table
#Model 1: h.d ~ 1
#Model 2: h.d ~ init.h
#Model 3: h.d ~ init.h + I(init.h^2)
#Model 4: h.d ~ init.h + I(init.h^2) + I(init.h^3)
#  Res.Df   RSS Df Sum of Sq       F    Pr(>F)    
#1      6 77022                                   
#2      5  5671  1     71351 4435.98 7.458e-06 ***
#3      4   744  1      4927  306.33 0.0004065 ***
#4      3    48  1       696   43.26 0.0071503 ** ### A adiao de mais um termo melhora significativamente o modelo,
summary(modelo.1)
#Multiple R-squared: 0.9264,  Adjusted R-squared: 0.9116 
summary(modelo.2)
#Multiple R-squared: 0.9903,  Adjusted R-squared: 0.9855 
summary(modelo.3)
#Multiple R-squared: 0.9994,  Adjusted R-squared: 0.9987 ### porm a melhora  pequena, pouco mais de 1% de variaao explicada
############################################################ em comparaao com o modelo 2. 

### Massa de Recm-Nascidos

RN.original<-read.table("babies.txt",head=TRUE,as.is=TRUE)

RN.original
str(RN.original)
RN<-RN.original[RN.original$bwt!=999 & RN.original$gestation!=999 & RN.original$parity!=9 & RN.original$height!=99 & RN.original$smoke!=9 &RN.original$age!=99& RN.original$weight!=999,]
str(RN)
RN$parity <- as.logical(RN$parity)
m.1<-lm(bwt~gestation, data=RN)
summary(m.1)
#Multiple R-squared: 0.1661,  Adjusted R-squared: 0.1654 
#F-statistic: 233.4 on 1 and 1172 DF,  p-value: < 2.2e-16
m.2<-lm(bwt~parity,data=RN)
summary(m.2)
#Residual standard error: 18.32 on 1172 degrees of freedom
#Multiple R-squared: 0.001928,  Adjusted R-squared: 0.001076 ### No sig.
#F-statistic: 2.264 on 1 and 1172 DF,  p-value: 0.1327 
m.3<-lm(bwt~age,data=RN)
summary(m.3)
#Multiple R-squared: 0.0007281,  Adjusted R-squared: -0.0001245 ### No sig.
#F-statistic: 0.8539 on 1 and 1172 DF,  p-value: 0.3556 
m.4<-lm(bwt~height,data=RN)
summary(m.4)
#Multiple R-squared: 0.0415,  Adjusted R-squared: 0.04068 
#F-statistic: 50.74 on 1 and 1172 DF,  p-value: 1.838e-12 
m.5<-lm(bwt~weight,data=RN)
summary(m.5)
#Multiple R-squared: 0.02431,  Adjusted R-squared: 0.02348 
#F-statistic:  29.2 on 1 and 1172 DF,  p-value: 7.887e-08
m.6<-lm(bwt~smoke,data=RN)
summary(m.6)
#Multiple R-squared: 0.06091,  Adjusted R-squared: 0.06011 
#F-statistic: 76.02 on 1 and 1172 DF,  p-value: < 2.2e-16
### SEM INTERAO, somente as inicialmente sig.
modelo1<-lm(bwt~gestation+smoke+height+weight,data=RN)
summary(modelo1)
#Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -77.25871   14.05139  -5.498 4.71e-08 ***
#gestation     0.43718    0.02909  15.028  < 2e-16 ***
#smoke        -8.34833    0.95453  -8.746  < 2e-16 ***
#height        1.09733    0.20463   5.363 9.88e-08 ***
#weight        0.05981    0.02491   2.401   0.0165 *  
#Residual standard error: 15.88 on 1169 degrees of freedom
#Multiple R-squared: 0.2519,  Adjusted R-squared: 0.2493 
#F-statistic: 98.39 on 4 and 1169 DF,  p-value: < 2.2e-16 
### SEM INTERAAO, todas
modelo1b<-lm(bwt~gestation+smoke+height+weight+parity+age,data=RN)
summary(modelo1b)
#Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -80.41085   14.34657  -5.605 2.60e-08 ***
#gestation     0.44398    0.02910  15.258  < 2e-16 ***
#smoke        -8.40073    0.95382  -8.807  < 2e-16 ***
#height        1.15402    0.20502   5.629 2.27e-08 ***
#weight        0.05017    0.02524   1.987  0.04711 *  
#parityTRUE   -3.32720    1.12895  -2.947  0.00327 ** 
#age          -0.00895    0.08582  -0.104  0.91696    ### age no sig.
#Residual standard error: 15.83 on 1167 degrees of freedom
#Multiple R-squared: 0.258,  Adjusted R-squared: 0.2541 
#F-statistic: 67.61 on 6 and 1167 DF,  p-value: < 2.2e-16
### SEM INTERAAO, SEM age
modelo1c<-lm(bwt~gestation+smoke+height+weight+parity,data=RN)
summary(modelo1c)
#Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -80.71321   14.04465  -5.747 1.16e-08 ***
#gestation     0.44408    0.02907  15.276  < 2e-16 ***
#smoke        -8.39390    0.95117  -8.825  < 2e-16 ***
#height        1.15497    0.20473   5.641 2.11e-08 ***
#weight        0.04983    0.02503   1.991  0.04672 *  
#parityTRUE   -3.28762    1.06281  -3.093  0.00203 ** 
#Residual standard error: 15.82 on 1168 degrees of freedom
#Multiple R-squared: 0.2579,  Adjusted R-squared: 0.2548 
#F-statistic:  81.2 on 5 and 1168 DF,  p-value: < 2.2e-16
### SEM INTERAAO, SEM parity
modelo1d<-lm(bwt~gestation+smoke+height+weight+age,data=RN)
summary(modelo1d)
#Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -80.18172   14.39348  -5.571 3.15e-08 ***
#gestation     0.43872    0.02914  15.056  < 2e-16 ***
#smoke        -8.29494    0.95628  -8.674  < 2e-16 ***
#height        1.11130    0.20518   5.416 7.38e-08 ***
#weight        0.05597    0.02525   2.217   0.0268 *  
#age           0.07607    0.08109   0.938   0.3484    ##### age no  sig.
#Residual standard error: 15.88 on 1168 degrees of freedom
#Multiple R-squared: 0.2524,  Adjusted R-squared: 0.2492 
#F-statistic: 78.88 on 5 and 1168 DF,  p-value: < 2.2e-16
###### COMPARAOES ENTRE OS MODELOS SEM INTERAAO
anova(modelo1,modelo1c,modelo1b)
#Analysis of Variance Table

#Model 1: bwt ~ gestation + smoke + height + weight
#Model 2: bwt ~ gestation + smoke + height + weight + parity
#Model 3: bwt ~ gestation + smoke + height + weight + parity + age
#  Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
#1   1169 294808                                
#2   1168 292412  1   2395.54 9.5606 0.002035 ** ### SIG.
#3   1167 292409  1      2.73 0.0109 0.916956   
### SEM CONSIDERAR INTERAOES O MELHOR MODELO : modelo1c<-lm(bwt~gestation+smoke+height+weight+parity,data=RN)

#### COM INTERAOES
####